Purpose:

Runs survival analysis models using splicing cluster assignment and 1) single exon splicing burden index (SBI) 2) KEGG Spliceosome GSVA scores or 3) CLK1 exon 4 TPM as a predictor

Usage

Uses a wrapper function (survival_analysis) from utils folder.

Setup

Packages and functions

Load packages, set directory paths and call setup script

library(tidyverse)
library(survival)
library(ggpubr)
library(ggplot2)
library(patchwork)

root_dir <- rprojroot::find_root(rprojroot::has_dir(\.git\))

data_dir <- file.path(root_dir, \data\)
analysis_dir <- file.path(root_dir, \analyses\, \survival\)
input_dir <- file.path(analysis_dir, \results\)
results_dir <- file.path(analysis_dir, \results\)
plot_dir <- file.path(analysis_dir, \plots\)

# If the input and results directories do not exist, create it
if (!dir.exists(results_dir)) {
  dir.create(results_dir, recursive = TRUE)
}

source(file.path(analysis_dir, \util\, \survival_models.R\))
knitr::opts_chunk$set(cache = FALSE)

Set metadata and cluster assignment file paths

metadata_file <- file.path(input_dir, \splicing_indices_with_survival.tsv\)

cluster_file <- file.path(root_dir, \analyses\,
                          \sample-psi-clustering\, \results\,
                          \sample-cluster-metadata-top-5000-events-stranded.tsv\)

kegg_scores_stranded_file <- file.path(root_dir, \analyses\,
                          \sample-psi-clustering\, \results\,
                          \gsva_output_stranded.tsv\)

tpm_file <- file.path(data_dir, \rna-isoform-expression-rsem-tpm.rds\)
clk1_psi_file <- file.path(root_dir, 
                           \analyses\, 
                           \CLK1-splicing_correlations\, 
                           \results\, 
                           \clk1-exon4-psi.tsv\)

Wrangle data Add cluster assignment and spliceosome gsva scores to metadata and define column lgg_group (LGG or non_LGG)

metadata <- read_tsv(metadata_file)
clusters <- read_tsv(cluster_file) %>%
  dplyr::rename(Kids_First_Biospecimen_ID = sample_id)
clk1_psi <- read_tsv(clk1_psi_file) %>%
  dplyr::rename(CLK1_ex4_PSI = PSI) %>%
  select(-plot_group)
gsva_scores <- read_tsv(kegg_scores_stranded_file) %>%
  dplyr::filter(geneset == \KEGG_SPLICEOSOME\) %>%
  dplyr::rename(spliceosome_gsva_score = score)
all_clk4_transcr_counts <- readRDS(tpm_file) %>%
  filter(grepl(\^CLK1\, gene_symbol)) %>%
  mutate(
    transcript_id = case_when(
      transcript_id %in% c(\ENST00000321356.9\, \ENST00000434813.3\, \ENST00000409403.6\) ~ \Exon 4\,
      # transcript_id == \ENST00000321356.9\ ~ \Exon 4\,
      TRUE ~ \Other\
    )
  ) %>%
  group_by(transcript_id) %>%
  summarise(across(starts_with(\BS\), sum, na.rm = TRUE)) %>%
  pivot_longer(cols = -transcript_id, names_to = \Kids_First_Biospecimen_ID\, values_to = \CLK1_Ex4_TPM\) %>%
  filter(transcript_id == \Exon 4\) %>%
  inner_join(clusters, by = \Kids_First_Biospecimen_ID\) %>%
  left_join(clk1_psi)
# how many clusters?
n_clust <- length(unique(clusters$cluster))

metadata <- metadata %>%
  right_join(all_clk4_transcr_counts %>% dplyr::select(Kids_First_Biospecimen_ID,
                                       cluster, CLK1_Ex4_TPM, CLK1_ex4_PSI)) %>%
  left_join(gsva_scores %>% dplyr::select(sample_id,
                                          spliceosome_gsva_score),
            by = c(\Kids_First_Biospecimen_ID\ = \sample_id\)) %>% 
  dplyr::mutate(cluster = glue::glue(\Cluster {cluster}\)) %>%
  # dplyr::mutate(cluster = fct_relevel(cluster,
  #                                             paste0(\Cluster \, 1:n_clust))) %>%
  dplyr::mutate(cluster = forcats::fct_relevel(cluster, \Cluster 6\, after = 0)) %>%
  dplyr::mutate(lgg_group = case_when(
    plot_group == \Low-grade glioma\ ~ \LGG\,
    TRUE ~ \non-LGG\
  )) %>%
  dplyr::mutate(SBI = SI_Total * 10) %>%
  dplyr::mutate(age_at_diagnosis_years = age_at_diagnosis_days/365.25)

Generate coxph models including extent of tumor resection, lgg group, cluster assignment, SBI, and CLK1 exon 4 TPM as covariates

add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c(\Not Reported\, \Unavailable\),],
                              terms = \extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI+CLK1_Ex4_TPM\,
                               file.path(results_dir, \cox_OS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS\),
                               \multivariate\,
                               years_col = \OS_years\,
                               status_col = \OS_status\)

forest_os <- plotForest(readRDS(file.path(results_dir, \cox_OS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS\)))

forest_os

ggsave(file.path(plot_dir, \forest_add_OS_resection_lgg_group_cluster_assignment_SBI_CLK1_Ex4_TPM.pdf\),
       forest_os,
       width = 10, height = 6, units = \in\,
       device = \pdf\)

add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c(\Not Reported\, \Unavailable\),],
                              terms = \extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI+CLK1_Ex4_TPM\,
                               file.path(results_dir, \cox_EFS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS\),
                               \multivariate\,
                               years_col = \EFS_years\,
                               status_col = \EFS_status\)

forest_efs <- plotForest(readRDS(file.path(results_dir, \cox_EFS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS\)))

forest_efs

ggsave(file.path(plot_dir, \forest_add_EFS_resection_lgg_group_cluster_assignment_SBI_CLK1_Ex4_TPM.pdf\),
       forest_efs,
       width = 10, height = 6, units = \in\,
       device = \pdf\)

repeat analysis with CLK1 exon 4 TPM alone

add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c(\Not Reported\, \Unavailable\),],
                              terms = \extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_Ex4_TPM\,
                               file.path(results_dir, \cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS\),
                               \multivariate\,
                               years_col = \OS_years\,
                               status_col = \OS_status\)

forest_os <- plotForest(readRDS(file.path(results_dir, \cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS\)))

forest_os

ggsave(file.path(plot_dir, \forest_add_OS_resection_lgg_group_cluster_assignment_CLK1_Ex4_TPM.pdf\),
       forest_os,
       width = 10, height = 6, units = \in\,
       device = \pdf\)


add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c(\Not Reported\, \Unavailable\),],
                              terms = \extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_Ex4_TPM\,
                               file.path(results_dir, \cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS\),
                               \multivariate\,
                               years_col = \EFS_years\,
                               status_col = \EFS_status\)

forest_efs <- plotForest(readRDS(file.path(results_dir, \cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS\)))

forest_efs

ggsave(file.path(plot_dir, \forest_add_EFS_resection_lgg_group_cluster_assignment_CLK1_Ex4_TPM.pdf\),
       forest_efs,
       width = 10, height = 6, units = \in\,
       device = \pdf\)

repeat analysis with CLK1 exon 4 PSI

add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c(\Not Reported\, \Unavailable\),],
                              terms = \extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_ex4_PSI\,
                               file.path(results_dir, \cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS\),
                               \multivariate\,
                               years_col = \OS_years\,
                               status_col = \OS_status\)

forest_os <- plotForest(readRDS(file.path(results_dir, \cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS\)))

forest_os

ggsave(file.path(plot_dir, \forest_add_OS_resection_lgg_group_cluster_assignment_CLK1_ex4_PSI.pdf\),
       forest_os,
       width = 10, height = 6, units = \in\,
       device = \pdf\)

add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c(\Not Reported\, \Unavailable\),],
                              terms = \extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_ex4_PSI\,
                               file.path(results_dir, \cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS\),
                               \multivariate\,
                               years_col = \EFS_years\,
                               status_col = \EFS_status\)

forest_efs <- plotForest(readRDS(file.path(results_dir, \cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS\)))

forest_efs

ggsave(file.path(plot_dir, \forest_add_EFS_resection_lgg_group_cluster_assignment_CLK1_ex4_PSI.pdf\),
       forest_efs,
       width = 10, height = 6, units = \in\,
       device = \pdf\)

Interaction with GSVA, SBI, CLK1

models <- c("spliceosome_gsva_score", "SBI", "CLK1_Ex4_TPM", "CLK1_ex4_PSI")
# by cluster
for (each in models) {
  int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
                                 "multivariate",
                                 years_col = "EFS_years",
                                 status_col = "EFS_status")
  
  int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
  
  int_forest_efs
  
  ggsave(file.path(plot_dir, paste0("forest_int_EFS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
         int_forest_efs,
         width = 10, height = 6, units = "in",
         device = "pdf")

  int_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
                                 "multivariate",
                                 years_col = "OS_years",
                                 status_col = "OS_status")
  
  int_forest_os <- plotForest(readRDS(file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
  
  int_forest_os
  
  ggsave(file.path(plot_dir, paste0("forest_int_OS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
         int_forest_os,
         width = 10, height = 6, units = "in",
         device = "pdf")
}

## clk1 x age
  int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster+CLK1_Ex4_TPM*age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM_age.RDS")),
                                 "multivariate",
                                 years_col = "EFS_years",
                                 status_col = "EFS_status")
  
  int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM_age.RDS"))))
  
  int_forest_efs
  
  ggsave(file.path(plot_dir, paste0("forest_int_EFS_resection_lgg_group_cluster_clk1_age.pdf")),
         int_forest_efs,
         width = 10, height = 6, units = "in",
         device = "pdf")

  int_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster+CLK1_Ex4_TPM*age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_clk1_age.RDS")),
                                 "multivariate",
                                 years_col = "OS_years",
                                 status_col = "OS_status")
  
  int_forest_os <- plotForest(readRDS(file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_clk1_age.RDS"))))
  
  int_forest_os
  
  ggsave(file.path(plot_dir, paste0("forest_int_OS_resection_lgg_group_cluster_clk1_age.pdf")),
         int_forest_os,
         width = 10, height = 6, units = "in",
         device = "pdf")

models2 <- c("SBI", "CLK1_Ex4_TPM", "CLK1_ex4_PSI")
for (each in models2) {
  #### by spliceosome_gsva_score
  int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster+spliceosome_gsva_score*", each, "+age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_spliceosome_gsva_score_", each, ".RDS")),
                                 "multivariate",
                                 years_col = "EFS_years",
                                 status_col = "EFS_status")
  
  int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_spliceosome_gsva_score_", each, ".RDS"))))
  
  int_forest_efs
  
  ggsave(file.path(plot_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_spliceosome_gsva_score_", each, ".pdf")),
         int_forest_efs,
         width = 10, height = 6, units = "in",
         device = "pdf")
}
  


add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score+CLK1_Ex4_TPM",
                               file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS"),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS")))

forest_efs

ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_spliceosome_score_CLK1_Ex4_TPM.pdf"),
       forest_efs,
       width = 10, height = 6, units = "in",
       device = "pdf")


add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score+CLK1_Ex4_TPM",
                               file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS"),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS")))

forest_os

ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_spliceosome_score_CLK1_Ex4_TPM.pdf"),
       forest_efs,
       width = 10, height = 6, units = "in",
       device = "pdf")

Filter for clusters

cluster_list <- unique(metadata$cluster)

for (each in cluster_list) {
cluster_df <- metadata %>%
  dplyr::filter(cluster == each,
                !is.na(EFS_days)) %>%
  dplyr::mutate(CLK1_TPM_group = case_when(
      CLK1_Ex4_TPM > summary(CLK1_Ex4_TPM)[\3rd Qu.\] ~ \High CLK1 TPM\,
      CLK1_Ex4_TPM < summary(CLK1_Ex4_TPM)[\1st Qu.\] ~ \Low CLK1 TPM\,
      TRUE ~ NA_character_),
      CLK1_PSI_group = case_when(CLK1_ex4_PSI > summary(CLK1_ex4_PSI)[\3rd Qu.\] ~ \High CLK1 PSI\,
      CLK1_ex4_PSI < summary(CLK1_ex4_PSI)[\1st Qu.\] ~ \Low CLK1 PSI\,
      TRUE ~ NA_character_
    )) %>%
  dplyr::mutate(CLK1_TPM_group = fct_relevel(CLK1_TPM_group,
                                                 c(\Low CLK1 TPM\, \High CLK1 TPM\)),
                CLK1_PSI_group = fct_relevel(CLK1_PSI_group,
                                                 c(\Low CLK1 PSI\, \High CLK1 PSI\))) %>%
 # collapse groups which do not have min N
   dplyr::mutate(
    plot_group = forcats::fct_drop(plot_group),
    plot_group = forcats::fct_lump_min(plot_group, min = 3, other_level = \Collapsed\)
  )

 safe_each <- gsub(\[^A-Za-z0-9_-]+\, \_\, each)

# Generate KM models with `CLK1_TPM_group` as covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
cluster_clk_tpm_kap_os <- survival_analysis(
  metadata  = cluster_df %>% 
    dplyr::filter(!is.na(CLK1_TPM_group)),
  ind_var = \CLK1_TPM_group\,
  test = \kap.meier\,
  metadata_sample_col = \Kids_First_Biospecimen_ID\,
  days_col = \OS_days\,
  status_col = \OS_status\
)

readr::write_rds(cluster_clk_tpm_kap_os,
                 file.path(results_dir, paste0( \logrank_\, safe_each, \_OS_clk1_tpm_group.RDS\)))

cluster_clk_tpm_kap_efs <- survival_analysis(
  metadata  = cluster_df %>% 
    dplyr::filter(!is.na(CLK1_TPM_group)),
  ind_var = \CLK1_TPM_group\,
  test = \kap.meier\,
  metadata_sample_col = \Kids_First_Biospecimen_ID\,
  days_col = \EFS_days\,
  status_col = \EFS_status\
)

readr::write_rds(cluster_clk_tpm_kap_efs,
                 file.path(results_dir, paste0( \logrank_\, safe_each, \_EFS_clk1_tpm_group.RDS\)))

# Generate KM models with `CLK1_PSI_group` as covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
cluster_clk_psi_kap_os <- survival_analysis(
  metadata  = cluster_df %>% 
    dplyr::filter(!is.na(CLK1_PSI_group)),
  ind_var = \CLK1_PSI_group\,
  test = \kap.meier\,
  metadata_sample_col = \Kids_First_Biospecimen_ID\,
  days_col = \OS_days\,
  status_col = \OS_status\
)

readr::write_rds(cluster_clk_psi_kap_os,
                 file.path(results_dir, paste0( \logrank_\, safe_each, \_OS_clk1_psi_group.RDS\)))

cluster_clk_psi_kap_efs <- survival_analysis(
  metadata  = cluster_df %>% 
    dplyr::filter(!is.na(CLK1_PSI_group)),
  ind_var = \CLK1_PSI_group\,
  test = \kap.meier\,
  metadata_sample_col = \Kids_First_Biospecimen_ID\,
  days_col = \EFS_days\,
  status_col = \EFS_status\
)

readr::write_rds(cluster_clk_psi_kap_efs,
                 file.path(results_dir, paste0( \logrank_\, safe_each, \_EFS_clk1_psi_group.RDS\)))

# Generate cluster KM SI_group plots

km_cluster_clk_tpm_os_plot <- plotKM(model = cluster_clk_tpm_kap_os,
                    variable = \CLK1_TPM_group\,
                    combined = F, 
                    title = paste0(each, \

---
title: "Run splicing cluster assignment, splicing burden, gsva, CLK1"
output: 
  html_notebook:
    toc: TRUE
    toc_float: TRUE
author: Ryan Corbett, adapted by Jo Lynne Rokita for CLK1
date: 2024
params:
  plot_ci: TRUE
---

**Purpose:** 

Runs survival analysis models using splicing cluster assignment and 1) single exon splicing burden index (SBI) 2) KEGG Spliceosome GSVA scores or 3) CLK1 exon 4 TPM as a predictor

## Usage 

Uses a wrapper function (`survival_analysis`) from utils folder. 

## Setup

#### Packages and functions

Load packages, set directory paths and call setup script

```{r}
library(tidyverse)
library(survival)
library(ggpubr)
library(ggplot2)
library(patchwork)

root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))

data_dir <- file.path(root_dir, "data")
analysis_dir <- file.path(root_dir, "analyses", "survival")
input_dir <- file.path(analysis_dir, "results")
results_dir <- file.path(analysis_dir, "results")
plot_dir <- file.path(analysis_dir, "plots")

# If the input and results directories do not exist, create it
if (!dir.exists(results_dir)) {
  dir.create(results_dir, recursive = TRUE)
}

source(file.path(analysis_dir, "util", "survival_models.R"))
knitr::opts_chunk$set(cache = FALSE)
```

Set metadata and cluster assignment file paths

```{r set paths}
metadata_file <- file.path(input_dir, "splicing_indices_with_survival.tsv")

cluster_file <- file.path(root_dir, "analyses",
                          "sample-psi-clustering", "results",
                          "sample-cluster-metadata-top-5000-events-stranded.tsv")

kegg_scores_stranded_file <- file.path(root_dir, "analyses",
                          "sample-psi-clustering", "results",
                          "gsva_output_stranded.tsv")

tpm_file <- file.path(data_dir, "rna-isoform-expression-rsem-tpm.rds")
clk1_psi_file <- file.path(root_dir, 
                           "analyses", 
                           "CLK1-splicing_correlations", 
                           "results", 
                           "clk1-exon4-psi.tsv")
```

Wrangle data 
Add cluster assignment and spliceosome gsva scores to `metadata` and define column `lgg_group` (LGG or non_LGG)

```{r}
metadata <- read_tsv(metadata_file)

clusters <- read_tsv(cluster_file) %>%
  dplyr::rename(Kids_First_Biospecimen_ID = sample_id)

clk1_psi <- read_tsv(clk1_psi_file) %>%
  dplyr::rename(CLK1_ex4_PSI = PSI) %>%
  select(-plot_group)

gsva_scores <- read_tsv(kegg_scores_stranded_file) %>%
  dplyr::filter(geneset == "KEGG_SPLICEOSOME") %>%
  dplyr::rename(spliceosome_gsva_score = score)

all_clk4_transcr_counts <- readRDS(tpm_file) %>%
  filter(grepl("^CLK1", gene_symbol)) %>%
  mutate(
    transcript_id = case_when(
      transcript_id %in% c("ENST00000321356.9", "ENST00000434813.3", "ENST00000409403.6") ~ "Exon 4",
      # transcript_id == "ENST00000321356.9" ~ "Exon 4",
      TRUE ~ "Other"
    )
  ) %>%
  group_by(transcript_id) %>%
  summarise(across(starts_with("BS"), sum, na.rm = TRUE)) %>%
  pivot_longer(cols = -transcript_id, names_to = "Kids_First_Biospecimen_ID", values_to = "CLK1_Ex4_TPM") %>%
  filter(transcript_id == "Exon 4") %>%
  inner_join(clusters, by = "Kids_First_Biospecimen_ID") %>%
  left_join(clk1_psi)

# how many clusters?
n_clust <- length(unique(clusters$cluster))

metadata <- metadata %>%
  right_join(all_clk4_transcr_counts %>% dplyr::select(Kids_First_Biospecimen_ID,
                                       cluster, CLK1_Ex4_TPM, CLK1_ex4_PSI)) %>%
  left_join(gsva_scores %>% dplyr::select(sample_id,
                                          spliceosome_gsva_score),
            by = c("Kids_First_Biospecimen_ID" = "sample_id")) %>% 
  dplyr::mutate(cluster = glue::glue("Cluster {cluster}")) %>%
  dplyr::mutate(cluster = fct_relevel(cluster, paste0("Cluster ", 1:n_clust))) %>%
  dplyr::mutate(lgg_group = case_when(
    plot_group == "Low-grade glioma" ~ "LGG",
    TRUE ~ "non-LGG"
  )) %>%
  dplyr::mutate(SBI = SI_Total * 10) %>%
  dplyr::mutate(age_at_diagnosis_years = age_at_diagnosis_days/365.25)
```

Generate coxph models including extent of tumor resection, lgg group, cluster assignment, SBI, and CLK1 exon 4 TPM as covariates

```{r}
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI+CLK1_Ex4_TPM",
                               file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS"),
                               "multivariate",
                               years_col = "OS_years",
                               status_col = "OS_status")

forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS")))

forest_os

ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_SBI_CLK1_Ex4_TPM.pdf"),
       forest_os,
       width = 10, height = 6, units = "in",
       device = "pdf")

add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI+CLK1_Ex4_TPM",
                               file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS"),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS")))

forest_efs

ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_SBI_CLK1_Ex4_TPM.pdf"),
       forest_efs,
       width = 10, height = 6, units = "in",
       device = "pdf")
```
repeat analysis with CLK1 exon 4 TPM alone

```{r}
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_Ex4_TPM",
                               file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS"),
                               "multivariate",
                               years_col = "OS_years",
                               status_col = "OS_status")

forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS")))

forest_os

ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_CLK1_Ex4_TPM.pdf"),
       forest_os,
       width = 10, height = 6, units = "in",
       device = "pdf")


add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_Ex4_TPM",
                               file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS"),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS")))

forest_efs

ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_CLK1_Ex4_TPM.pdf"),
       forest_efs,
       width = 10, height = 6, units = "in",
       device = "pdf")


```


repeat analysis with CLK1 exon 4 PSI

```{r}
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_ex4_PSI",
                               file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS"),
                               "multivariate",
                               years_col = "OS_years",
                               status_col = "OS_status")

forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS")))

forest_os

ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_CLK1_ex4_PSI.pdf"),
       forest_os,
       width = 10, height = 6, units = "in",
       device = "pdf")

add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_ex4_PSI",
                               file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS"),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS")))

forest_efs

ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_CLK1_ex4_PSI.pdf"),
       forest_efs,
       width = 10, height = 6, units = "in",
       device = "pdf")
```


Interaction with GSVA, SBI, CLK1

```{r Interaction models with GSVA, SBI, CLK1}
models <- c("spliceosome_gsva_score", "SBI", "CLK1_Ex4_TPM", "CLK1_ex4_PSI")
# by cluster
for (each in models) {
  int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
                                 "multivariate",
                                 years_col = "EFS_years",
                                 status_col = "EFS_status")
  
  int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
  
  int_forest_efs
  
  ggsave(file.path(plot_dir, paste0("forest_int_EFS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
         int_forest_efs,
         width = 10, height = 6, units = "in",
         device = "pdf")

  int_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
                                 "multivariate",
                                 years_col = "OS_years",
                                 status_col = "OS_status")
  
  int_forest_os <- plotForest(readRDS(file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
  
  int_forest_os
  
  ggsave(file.path(plot_dir, paste0("forest_int_OS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
         int_forest_os,
         width = 10, height = 6, units = "in",
         device = "pdf")
}

## clk1 x age
  int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster+CLK1_Ex4_TPM*age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM_age.RDS")),
                                 "multivariate",
                                 years_col = "EFS_years",
                                 status_col = "EFS_status")
  
  int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM_age.RDS"))))
  
  int_forest_efs
  
  ggsave(file.path(plot_dir, paste0("forest_int_EFS_resection_lgg_group_cluster_clk1_age.pdf")),
         int_forest_efs,
         width = 10, height = 6, units = "in",
         device = "pdf")

  int_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster+CLK1_Ex4_TPM*age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_clk1_age.RDS")),
                                 "multivariate",
                                 years_col = "OS_years",
                                 status_col = "OS_status")
  
  int_forest_os <- plotForest(readRDS(file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_clk1_age.RDS"))))
  
  int_forest_os
  
  ggsave(file.path(plot_dir, paste0("forest_int_OS_resection_lgg_group_cluster_clk1_age.pdf")),
         int_forest_os,
         width = 10, height = 6, units = "in",
         device = "pdf")

models2 <- c("SBI", "CLK1_Ex4_TPM", "CLK1_ex4_PSI")
for (each in models2) {
  #### by spliceosome_gsva_score
  int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                                terms = paste0("extent_of_tumor_resection+lgg_group+cluster+spliceosome_gsva_score*", each, "+age_at_diagnosis_years"),
                                 file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_spliceosome_gsva_score_", each, ".RDS")),
                                 "multivariate",
                                 years_col = "EFS_years",
                                 status_col = "EFS_status")
  
  int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_spliceosome_gsva_score_", each, ".RDS"))))
  
  int_forest_efs
  
  ggsave(file.path(plot_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_spliceosome_gsva_score_", each, ".pdf")),
         int_forest_efs,
         width = 10, height = 6, units = "in",
         device = "pdf")
}
  


add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score+CLK1_Ex4_TPM",
                               file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS"),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS")))

forest_efs

ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_spliceosome_score_CLK1_Ex4_TPM.pdf"),
       forest_efs,
       width = 10, height = 6, units = "in",
       device = "pdf")


add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
                              terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score+CLK1_Ex4_TPM",
                               file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS"),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS")))

forest_os

ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_spliceosome_score_CLK1_Ex4_TPM.pdf"),
       forest_efs,
       width = 10, height = 6, units = "in",
       device = "pdf")


```



Filter for clusters

```{r}

cluster_list <- unique(metadata$cluster)

for (each in cluster_list) {
cluster_df <- metadata %>%
  dplyr::filter(cluster == each,
                !is.na(EFS_days)) %>%
  dplyr::mutate(CLK1_TPM_group = case_when(
      CLK1_Ex4_TPM > summary(CLK1_Ex4_TPM)["3rd Qu."] ~ "High CLK1 TPM",
      CLK1_Ex4_TPM < summary(CLK1_Ex4_TPM)["1st Qu."] ~ "Low CLK1 TPM",
      TRUE ~ NA_character_),
      CLK1_PSI_group = case_when(CLK1_ex4_PSI > summary(CLK1_ex4_PSI)["3rd Qu."] ~ "High CLK1 PSI",
      CLK1_ex4_PSI < summary(CLK1_ex4_PSI)["1st Qu."] ~ "Low CLK1 PSI",
      TRUE ~ NA_character_
    )) %>%
  dplyr::mutate(CLK1_TPM_group = fct_relevel(CLK1_TPM_group,
                                                 c("Low CLK1 TPM", "High CLK1 TPM")),
                CLK1_PSI_group = fct_relevel(CLK1_PSI_group,
                                                 c("Low CLK1 PSI", "High CLK1 PSI"))) %>%
 # collapse groups which do not have min N
   dplyr::mutate(
    plot_group = forcats::fct_drop(plot_group),
    plot_group = forcats::fct_lump_min(plot_group, min = 3, other_level = "Collapsed")
  )

 safe_each <- gsub("[^A-Za-z0-9_-]+", "_", each)

# Generate KM models with `CLK1_TPM_group` as covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
cluster_clk_tpm_kap_os <- survival_analysis(
  metadata  = cluster_df %>% 
    dplyr::filter(!is.na(CLK1_TPM_group)),
  ind_var = "CLK1_TPM_group",
  test = "kap.meier",
  metadata_sample_col = "Kids_First_Biospecimen_ID",
  days_col = "OS_days",
  status_col = "OS_status"
)

readr::write_rds(cluster_clk_tpm_kap_os,
                 file.path(results_dir, paste0( "logrank_", safe_each, "_OS_clk1_tpm_group.RDS")))

cluster_clk_tpm_kap_efs <- survival_analysis(
  metadata  = cluster_df %>% 
    dplyr::filter(!is.na(CLK1_TPM_group)),
  ind_var = "CLK1_TPM_group",
  test = "kap.meier",
  metadata_sample_col = "Kids_First_Biospecimen_ID",
  days_col = "EFS_days",
  status_col = "EFS_status"
)

readr::write_rds(cluster_clk_tpm_kap_efs,
                 file.path(results_dir, paste0( "logrank_", safe_each, "_EFS_clk1_tpm_group.RDS")))

# Generate KM models with `CLK1_PSI_group` as covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
cluster_clk_psi_kap_os <- survival_analysis(
  metadata  = cluster_df %>% 
    dplyr::filter(!is.na(CLK1_PSI_group)),
  ind_var = "CLK1_PSI_group",
  test = "kap.meier",
  metadata_sample_col = "Kids_First_Biospecimen_ID",
  days_col = "OS_days",
  status_col = "OS_status"
)

readr::write_rds(cluster_clk_psi_kap_os,
                 file.path(results_dir, paste0( "logrank_", safe_each, "_OS_clk1_psi_group.RDS")))

cluster_clk_psi_kap_efs <- survival_analysis(
  metadata  = cluster_df %>% 
    dplyr::filter(!is.na(CLK1_PSI_group)),
  ind_var = "CLK1_PSI_group",
  test = "kap.meier",
  metadata_sample_col = "Kids_First_Biospecimen_ID",
  days_col = "EFS_days",
  status_col = "EFS_status"
)

readr::write_rds(cluster_clk_psi_kap_efs,
                 file.path(results_dir, paste0( "logrank_", safe_each, "_EFS_clk1_psi_group.RDS")))

# Generate cluster KM SI_group plots

km_cluster_clk_tpm_os_plot <- plotKM(model = cluster_clk_tpm_kap_os,
                    variable = "CLK1_TPM_group",
                    combined = F, 
                    title = paste0(each, ", overall survival"),
                    p_pos = "topright")

ggsave(file.path(plot_dir, paste0("km_", safe_each, "_OS_clk1_tpm_group.pdf")),
       km_cluster_clk_tpm_os_plot,
       width = 8, height = 5, units = "in",
       device = "pdf")

km_cluster_clk1_tpm_efs_plot <- plotKM(model = cluster_clk_tpm_kap_efs,
                    variable = "CLK1_TPM_group",
                    combined = F, 
                    title = paste0(each, ", event-free survival"),
                    p_pos = "topright")

ggsave(file.path(plot_dir, paste0( "km_", safe_each, "_EFS_clk1_tpm_group.pdf")), 
       km_cluster_clk1_tpm_efs_plot,
       width = 8, height = 5, units = "in",
       device = "pdf")

km_cluster_clk1_psi_os_plot <- plotKM(model = cluster_clk_psi_kap_os,
                    variable = "CLK1_PSI_group",
                    combined = F, 
                    title = paste0(each, ", overall survival"),
                    p_pos = "topright")

ggsave(file.path(plot_dir, paste0( "km_", safe_each, "_OS_clk1_psi_group.pdf")),
       km_cluster_clk1_psi_os_plot,
       width = 8, height = 5, units = "in",
       device = "pdf")

km_cluster_clk1_psi_efs_plot <- plotKM(model = cluster_clk_psi_kap_efs,
                    variable = "CLK1_PSI_group",
                    combined = F, 
                    title = paste0(each, ", event-free survival"),
                    p_pos = "topright")

ggsave(file.path(plot_dir, paste0("km_", safe_each, "_EFS_clk1_psi_group.pdf")), 
       km_cluster_clk1_psi_efs_plot,
       width = 8, height = 5, units = "in",
       device = "pdf")

# Assess EFS and OS by CLK1 TPM group in multivariate models and generate forest plots

add_model_cluster_efs <- fit_save_model(cluster_df %>% 
                                      dplyr::filter(extent_of_tumor_resection != "Unavailable",
                                                    CLK1_TPM_group %in% c("High CLK1 TPM", "Low CLK1 TPM")) %>%
                                 dplyr::mutate(plot_group = forcats::fct_relevel(plot_group, "Low-grade glioma", after = 0)),
                              terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+CLK1_TPM_group",
                               file.path(results_dir, paste0( "cox_EFS_additive_terms_subtype_", safe_each, "_clk1_tpm_group.RDS")),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_cluster_clk1_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_additive_terms_subtype_", safe_each, "_clk1_tpm_group.RDS"))))

ggsave(file.path(plot_dir, paste0("forest_add_EFS_", safe_each, "_histology_resection_clk1_tpm_group.pdf")),
       forest_cluster_clk1_efs,
       width = 9, height = 4, units = "in",
       device = "pdf")

add_model_cluster_os <- fit_save_model(cluster_df %>% 
                                    dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
                                 dplyr::mutate(plot_group = forcats::fct_relevel(plot_group, "Low-grade glioma", after = 0)),
                              terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+CLK1_TPM_group",
                               file.path(results_dir, paste0( "cox_OS_additive_terms_subtype_", safe_each, "_clk1_tpm_group.RDS")),
                               "multivariate",
                               years_col = "OS_years",
                               status_col = "OS_status")

forest_cluster_clk1_os <- plotForest(readRDS(file.path(results_dir, paste0( "cox_OS_additive_terms_subtype_", safe_each, "_clk1_tpm_group.RDS"))))

ggsave(file.path(plot_dir, paste0( "forest_add_OS_", safe_each, "_histology_resection_clk1_tpm_group.pdf")),
       forest_cluster_clk1_os,
       width = 9, height = 4, units = "in",
       device = "pdf")

#Assess EFS and OS by CLK1 PSI group in multivariate models and generate forest plots

add_model_cluster_efs <- fit_save_model(cluster_df %>% 
                                      dplyr::filter(extent_of_tumor_resection != "Unavailable",
                                                    CLK1_PSI_group %in% c("High CLK1 PSI", "Low CLK1 PSI"))%>%
                                 dplyr::mutate(plot_group = forcats::fct_relevel(plot_group, "Low-grade glioma", after = 0)),
                              terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+CLK1_PSI_group",
                               file.path(results_dir, paste0( "cox_EFS_additive_terms_subtype_", safe_each, "_clk1_psi_group.RDS")),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_cluster_clk1_efs <- plotForest(readRDS(file.path(results_dir, paste0( "cox_EFS_additive_terms_subtype_", safe_each, "_clk1_psi_group.RDS"))))

ggsave(file.path(plot_dir, paste0( "forest_add_EFS_", safe_each, "_histology_resection_clk1_psi_group.pdf")),
       forest_cluster_clk1_efs,
       width = 9, height = 4, units = "in",
       device = "pdf")

add_model_cluster_os <- fit_save_model(cluster_df %>% 
                                    dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
                                 dplyr::mutate(plot_group = forcats::fct_relevel(plot_group, "Low-grade glioma", after = 0)),
                              terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+CLK1_PSI_group",
                               file.path(results_dir, paste0( "cox_OS_additive_terms_subtype_", safe_each, "_clk1_psi_group.RDS")),
                               "multivariate",
                               years_col = "OS_years",
                               status_col = "OS_status")

forest_cluster_clk1_os <- plotForest(readRDS(file.path(results_dir, paste0( "cox_OS_additive_terms_subtype_", safe_each, "_clk1_psi_group.RDS"))))

ggsave(file.path(plot_dir, paste0( "forest_add_OS_", safe_each, "_histology_resection_clk1_psi_group.pdf")),
       forest_cluster_clk1_os,
       width = 9, height = 4, units = "in",
       device = "pdf")

# Assess EFS and OS by CLK1 ex 4 TPM in multivariate models and generate forest plots

add_model_cluster_efs <- fit_save_model(cluster_df %>% 
                                      dplyr::filter(extent_of_tumor_resection != "Unavailable") %>%
                                 dplyr::mutate(plot_group = forcats::fct_relevel(plot_group, "Low-grade glioma", after = 0)),
                              terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+CLK1_Ex4_TPM",
                               file.path(results_dir, paste0( "cox_EFS_additive_terms_subtype_", safe_each, "_clk1_tpm.RDS")),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_cluster_clk1_efs <- plotForest(readRDS(file.path(results_dir, paste0( "cox_EFS_additive_terms_subtype_", safe_each, "_clk1_tpm.RDS"))))

ggsave(file.path(plot_dir, paste0( "forest_add_EFS_", safe_each, "_histology_resection_clk1_tpm.pdf")),
       forest_cluster_clk1_efs,
       width = 9, height = 4, units = "in",
       device = "pdf")

add_model_cluster_os <- fit_save_model(cluster_df %>% 
                                    dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
                                 dplyr::mutate(plot_group = forcats::fct_relevel(plot_group, "Low-grade glioma", after = 0)),
                              terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+CLK1_Ex4_TPM",
                               file.path(results_dir, paste0( "cox_OS_additive_terms_subtype_", safe_each, "_clk1_tpm.RDS")),
                               "multivariate",
                               years_col = "OS_years",
                               status_col = "OS_status")

forest_cluster_clk1_os <- plotForest(readRDS(file.path(results_dir, paste0( "cox_OS_additive_terms_subtype_", safe_each, "_clk1_tpm.RDS"))))

ggsave(file.path(plot_dir, paste0( "forest_add_OS_", safe_each, "_histology_resection_clk1_tpm.pdf")),
       forest_cluster_clk1_os,
       width = 9, height = 4, units = "in",
       device = "pdf")

# Assess EFS and OS by CLK1 ex 4 PSI in multivariate models and generate forest plots

add_model_cluster_efs <- fit_save_model(cluster_df %>% 
                                      dplyr::filter(extent_of_tumor_resection != "Unavailable") %>%
                                 dplyr::mutate(plot_group = forcats::fct_relevel(plot_group, "Low-grade glioma", after = 0)),
                              terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+CLK1_ex4_PSI",
                               file.path(results_dir, paste0( "cox_EFS_additive_terms_subtype_", safe_each, "_clk1_psi.RDS")),
                               "multivariate",
                               years_col = "EFS_years",
                               status_col = "EFS_status")

forest_cluster_clk1_efs <- plotForest(readRDS(file.path(results_dir, paste0( "cox_EFS_additive_terms_subtype_", safe_each, "_clk1_psi.RDS"))))

ggsave(file.path(plot_dir, paste0( "forest_add_EFS_", safe_each, "_histology_resection_clk1_psi.pdf")),
       forest_cluster_clk1_efs,
       width = 9, height = 4, units = "in",
       device = "pdf")

add_model_cluster_os <- fit_save_model(cluster_df %>% 
                                    dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
                                 dplyr::mutate(plot_group = forcats::fct_relevel(plot_group, "Low-grade glioma", after = 0)),
                              terms = "extent_of_tumor_resection+age_at_diagnosis_years+plot_group+CLK1_ex4_PSI",
                               file.path(results_dir, paste0( "cox_OS_additive_terms_subtype_", safe_each, "_clk1_psi.RDS")),
                               "multivariate",
                               years_col = "OS_years",
                               status_col = "OS_status")

forest_cluster_clk1_os <- plotForest(readRDS(file.path(results_dir, paste0( "cox_OS_additive_terms_subtype_", safe_each, "_clk1_psi.RDS"))))

ggsave(file.path(plot_dir, paste0( "forest_add_OS_", each, "_histology_resection_clk1_psi.pdf")),
       forest_cluster_clk1_os,
       width = 9, height = 4, units = "in",
       device = "pdf")
}
```


# Print session Info

```{r}
sessionInfo()
```